import scrapy
from scrapy.http import Request

#  ---豆瓣top250列表页---

class DoubanSpider(scrapy.Spider):
    name = 'douban'
    start_urls = ['https://movie.douban.com/top250']

    def parse(self, response):
        # 找到a标签
        # print(response.url)
        list_a = response.xpath('//div[@class="hd"]/a')
        i = 0
        # 循环a标签，找到电影名称和网址链接
        for a in list_a:
            i = i + 1
            # print(a)
            print(a.xpath('span[1]/text()').extract(), a.xpath('@ href').extract())
            # 获取下一页的链接
        nextpage=response.xpath('//span[@class="next"]/a/@href').extract()
        if nextpage:
            # print(nextpage[0])
            nexturl = 'https://movie.douban.com/top250%s'%nextpage[0]
            # print(nexturl) # 测试下一页的网址
            # 重新回调函数
            yield Request(url=nexturl, callback=self.parse)

